package cn.sunxyz.spider.downloader;

import cn.sunxyz.spider.Page;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;

/**
 * Created by yangrd on 2018/6/14
 * http://www.open-open.com/jsoup/
 **/
public class JSoupDownloader implements Downloader {

    private static final Logger LOGGER = LoggerFactory.getLogger(JSoupDownloader.class);

    @Override
    public Page download(String request) {
        LOGGER.debug("download url :=> {}", request);
        try {
            final Document document = Jsoup.connect(request)
                    .header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8")
                    .header("Accept-Encoding", "gzip, deflate, sdch")
                    .header("Accept-Language", "zh-CN,zh;q=0.8,en;q=0.6")
                    .header("Cache-Control", "max-age=0")
                    .header("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36")
                    .header("Cookie", "Hm_lvt_7ed65b1cc4b810e9fd37959c9bb51b31=1462812244; _gat=1; _ga=GA1.2.1061361785.1462812244")
                    .header("Host", "www.kuaidaili.com")
                    .header("Referer", "https://www.baidu.com/")
                    .timeout(5000).get();
            return Page.of(request, document);
        } catch (IOException e) {
            LOGGER.error("url: {}  e : {}", request, e.getMessage());
            return Page.of(request, e);
        }
    }
}
